## Figure 1: Firm-Politician Links based on Campaign Donations and Lobbying 
# Activities:: -----------------------------------------------------------------

## Instructions ----------------------------------------------------------------
# Step 1: Adjust MAIN_DIR to where README.txt is located
# Step 2: Adjust DATA_DIR to where Data is is located
# Step 3: Adjust FIG_DIR to where Output folder is
# Step 4: Run entire script


## setup -----------------------------------------------------------------------
# clean slate
rm(list = ls())

# load packages
library(tidyverse)
options(scipen=999)

# set directory
MAIN_DIR <- "C:/Users/js.egb/Dropbox/campaign-lobby-paper/replication_package"

# set data folder
DATA_DIR <-  paste(MAIN_DIR, "/data/", sep = "")

# set output folder
FIG_DIR <- paste(MAIN_DIR, "/output/", sep = "")

# file to be read in
file_in1 <- paste0(DATA_DIR, "d_final_20210127.RData") # file to be input
file_in2 <- paste0(DATA_DIR, "senate_fec_to_govtrack_fastlink_COMPLETE.csv") # file to be input
file_in3 <- paste0(DATA_DIR, "compustat_1954_2018_deduped.csv") # file to be input


# Senator Data


# load lobbying and donations data
load(file_in1)  # <- Modify depending on your path
cong <- 113 # set congress to subset
senate <- readr::read_csv(file_in2) %>%
  filter(congress == cong)

d_final <- d_final %>% filter(congress == cong, govtrack_id %in% unique(senate$govtrack_id))

# get top politicians acc. to number of connections
toppols <- d_final %>% 
  replace_na(list(don_tot = 0, lobbying_amount = 0, is.sponsor = 0, is.cosponsor = 0, is.committee = 0)) %>%
  mutate(don_bin = ifelse(don_tot>0 & (lobbying_amount == 0 | (is.sponsor==0 & is.cosponsor==0 & is.committee == 0)), 1, 0), 
         lob_bin = ifelse((lobbying_amount > 0 | (is.sponsor==1 | is.cosponsor==1 | is.committee > 0)) & don_tot==0, 1, 0),
         don_lob = ifelse((lobbying_amount > 0 | (is.sponsor==1 | is.cosponsor==1 | is.committee > 0)) & don_tot > 0, 1, 0)) %>%
  group_by(govtrack_id) %>% 
  summarise(don = sum(don_tot, na.rm = TRUE),
            lob = sum(lobbying_amount[is.sponsor==1 | is.cosponsor==1 | is.committee>0 | lobbying_amount > 0], na.rm = TRUE),
            tot = don+lob,
            don_con = sum(don_bin, na.rm = TRUE),
            lob_con = sum(lob_bin, na.rm = TRUE),
            don_lob_con = sum(don_lob, na.rm = TRUE),
            n_con = don_con + lob_con + don_lob_con) %>%
  mutate(lob_con_share = lob_con/(n_con),
         don_con_share = don_con/(n_con),
         don_lob_con_share = don_lob_con/(n_con)) %>%
  arrange(-don_lob_con_share) 

toppols$yax_pol <- nrow(toppols):1 # rank column. Needs to be reversed for politicians, for them to be up in graph


# get top firms acc. to number of connections
topfirms <- d_final %>% 
  replace_na(list(don_tot = 0, lobbying_amount = 0, is.sponsor = 0, is.cosponsor = 0, is.committee = 0)) %>%
  group_by(gvkey, govtrack_id) %>%
  mutate(don_bin = ifelse(don_tot>0 & (lobbying_amount == 0 | (is.sponsor==0 & is.cosponsor==0 & is.committee == 0)), 1, 0),
         lob_bin = ifelse((lobbying_amount > 0 | (is.sponsor==1 | is.cosponsor==1 | is.committee > 0)) & don_tot==0, 1, 0),
         don_lob = ifelse((lobbying_amount > 0 | (is.sponsor==1 | is.cosponsor==1 | is.committee > 0)) & don_tot > 0, 1, 0)) %>%
  group_by(gvkey) %>%
  summarise(don = sum(don_tot, na.rm = TRUE),
            lob = sum(lobbying_amount[is.sponsor==1 | is.cosponsor==1 | is.committee>0 | lobbying_amount > 0], na.rm = TRUE),
            tot = don+lob,
            don_con = sum(don_bin, na.rm = TRUE),
            lob_con = sum(lob_bin, na.rm = TRUE),
            don_lob_con = sum(don_lob, na.rm = TRUE),
            n_con = don_con + lob_con + don_lob_con,
            n_lines = n()) %>%
  mutate(lob_con_share = lob_con/(n_con),
         don_con_share = don_con/(n_con),
         don_lob_con_share = don_lob_con/(n_con)) %>%
  arrange(-don_lob_con_share)

# DF with names and parties
pol_names <- d_final %>% 
  mutate(pol_name = paste0(first_name, " ", last_name, " (", party, "-", state, ")")) %>%
  select(govtrack_id, pol_name) 
pol_names <- pol_names[-which(duplicated(pol_names$govtrack_id)),]

# DF with unique firm names
firm_names <- d_final %>% 
  ungroup(candidate_id) %>%
  select(gvkey, conml) 
firm_names <- firm_names[-which(duplicated(firm_names$gvkey)),]


# # Check how many donate/lobby
# (sum(topdyads$don>0)/nrow(topdyads)) # 35% donate
# (sum(topdyads$lob>0)/nrow(topdyads)) # 45% lobby
# (sum(topdyads$lob>0 & topdyads$don>0)/nrow(topdyads)) # 11% do both
# sum(topdyads$tot[topdyads$don>0 & topdyads$lob>0])/sum(topdyads$tot) # But 50% of Money


# firm names
compustat <- readr::read_csv(file_in3) %>% 
  select(gvkey, conml, loc) %>% filter(loc == "USA") %>% distinct()


#### Flow Graph 1: Top 150 firms 
n_firms <- 150
n_pols <- nrow(toppols)

plotfirms <- topfirms %>% 
  left_join(compustat, by = c("gvkey")) %>%
  filter(!is.na(conml)) %>% # need to check. Some firms have no compusta name, maybe use UUID instead
  slice(1:n_firms) %>%
  mutate(yax_firm = n_firms:1,
         conml = gsub("( \\(The\\))|( \\& Co)|", "", conml, fixed = FALSE),
         yax_firm_plot = rev(seq(c(n_pols/n_firms), n_pols, c(n_pols/n_firms)))) %>%
  mutate(conml = gsub("E. I. du Pont de Nemours and Co", "DuPont Company", conml, fixed = TRUE)) %>%
  mutate(conml = gsub("( Co$)", " Co.", conml, fixed = FALSE)) %>%
  mutate(conml = gsub("Corp.", " Corp", conml, fixed = TRUE)) %>%
  mutate(conml = gsub("Corp", " Corp.", conml, fixed = TRUE)) %>%
  mutate(conml = gsub("Inc", " Inc.", conml, fixed = TRUE)) %>%
  mutate(conml = gsub("Plc", " Plc.", conml, fixed = TRUE)) %>%
  mutate(conml = gsub("( +Corp\\.)|( +Co\\.)|( +Inc\\.)|( +Plc\\.)|( +Inc\\.\\.)", "", conml, fixed = FALSE)) %>%
  mutate(conml = trimws(conml, which = "both"))



plotdyads <- d_final %>% 
  select(gvkey, govtrack_id, report_year, lobbying_amount, don_tot, is.sponsor, is.cosponsor, is.committee) %>%
  filter(gvkey %in% plotfirms$gvkey, govtrack_id %in% toppols$govtrack_id) %>%
  replace_na(list(don_tot = 0, lobbying_amount = 0, is.sponsor = 0, is.cosponsor = 0, is.committee = 0)) %>%
  mutate(don_bin = ifelse(don_tot>0 & lobbying_amount == 0, 1, 0), 
         lob_bin = ifelse((lobbying_amount > 0 | (is.sponsor==1 | is.cosponsor==1 | is.committee > 0)) & don_tot==0, 1, 0),
         don_lob = ifelse((lobbying_amount > 0 | (is.sponsor==1 | is.cosponsor==1 | is.committee > 0)) & don_tot > 0, 1, 0)) %>%
  left_join(pol_names[,c("govtrack_id", "pol_name")], by = "govtrack_id") %>%
  left_join(firm_names, by = "gvkey" ) %>%
  left_join(plotfirms[,c("gvkey", "yax_firm", "yax_firm_plot")], by = c("gvkey")) %>%
  left_join(toppols[,c("govtrack_id", "n_con", "don_con_share", "don_lob_con_share", "lob_con_share", "yax_pol")], by = c("govtrack_id")) %>%
  ungroup() %>%
  arrange(-yax_pol) 


###### START PLOTTING
pdf(paste0(FIG_DIR, "figure_1.pdf"), height = 9)
#png("../../output/figures_paper/flowgraph_orangebluenavy_150firms.png", units="px", width=1800*2, height=2600*2, res=600)
par(las = 1, mar = c(0, 1, 4, 2), oma = c(0,0,0,0), cex.axis = 0.3, cex.main = 1.2)
plot(1, type="n", xlab="", ylab="", axes = FALSE, 
     main = paste0("U.S. Senators & Top ", n_firms, " Lobbying Firms, ", cong, "th Congress"), 
     xlim=c(-4, 10), ylim=c(0, length(unique(plotdyads$govtrack_id)))) # empty plot

# Legend
text(x = 6.12, y = 113.25-5, labels = c("Lobbying Only"), font = 2, cex = 0.4, adj = 1) # draw y axis
polygon(x = c(5,5,6,6), 
        y = c(111.5, 111.5+0.7, 111.5+0.7,111.5)-5,
        col = rgb(1/255, 75/255, 122/255, alpha = 1), lwd = 0.1)

text(x = 4.125, y = 113.25-5, labels = c("Donations Only"), font = 2, cex = 0.4, adj = 1) # draw y axis
polygon(x = c(3,3,4,4), 
        y = c(111.5, 111.5+0.7, 111.5+0.7,111.5)-5,
        col = rgb(69/255, 164/255, 194/255, alpha = 1), lwd = 0.1)

text(x = 2.375, y = 113.25-5, labels = c("Lobbying & Donations"), font = 2, cex = 0.4, adj = 1) # draw y axis
polygon(x = c(1,1,2,2), 
        y = c(111.5, 111.5+0.7, 111.5+0.7,111.5)-5,
        col = rgb(254/255, 175/255, 73/255, alpha = 1), lwd = 0.1)

# Firm Names
for (j in 1:length(plotfirms$gvkey)) {
  text(x = 0, y = plotfirms$yax_firm_plot[j], labels = plotfirms$conml[j], adj = 1, cex = 0.2925)
}

# Politician Names
for (k in 1:length(unique(plotdyads$pol_name))) {
  text(x = 7, y = toppols$yax_pol[k], labels = unique(plotdyads$pol_name)[k], adj = 0, cex = 0.3)
}


# plot share of donation-lobby connections vs lobby connections per politician
for (l in 1:nrow(toppols)) {
  # first, plot a full rectangle for lobbying connections
  polygon(x = c(8,8,9,9)+0.75, 
          y = c(toppols$yax_pol[l], toppols$yax_pol[l]+0.7, toppols$yax_pol[l]+0.7,toppols$yax_pol[l])-0.3,
          col = rgb(1/255, 75/255, 122/255, alpha = 1), lwd = 0.1)
  # second plot the share of donation connections
  polygon(x = c(8,8,8+toppols$don_lob_con_share[l]+toppols$don_con_share[l],8+toppols$don_lob_con_share[l]+toppols$don_con_share[l])+0.75, 
          y = c(toppols$yax_pol[l], toppols$yax_pol[l]+0.7, toppols$yax_pol[l]+0.7,toppols$yax_pol[l])-0.3,
          col = rgb(69/255, 164/255, 194/255, alpha = 1), lwd = 0.1)
  
  # third plot the share of donation+lobbying connections
  polygon(x = c(8,8,8+toppols$don_lob_con_share[l],8+toppols$don_lob_con_share[l])+0.75, 
          y = c(toppols$yax_pol[l], toppols$yax_pol[l]+0.7, toppols$yax_pol[l]+0.7,toppols$yax_pol[l])-0.3,
          col = rgb(254/255, 175/255, 73/255, alpha = 1), lwd = 0.1)
  
}



# plot share of donation-lobby connections vs lobby connections per Firm
for (l in 1:nrow(plotfirms)) {
  # first, plot a full rectangle for lobbying connections
  polygon(x = c(-2,-2,-1,-1)-1.25, 
          y = c(plotfirms$yax_firm_plot[l], plotfirms$yax_firm_plot[l]+0.5, plotfirms$yax_firm_plot[l]+0.5,plotfirms$yax_firm_plot[l])-0.3,
          col = rgb(1/255, 75/255, 122/255, alpha = 1), lwd = 0.1)
  # second plot the share of donation connections
  polygon(x = c(-2,-2,-2+plotfirms$don_lob_con_share[l]+plotfirms$don_con_share[l],-2+plotfirms$don_lob_con_share[l]+plotfirms$don_con_share[l])-1.25, 
          y = c(plotfirms$yax_firm_plot[l], plotfirms$yax_firm_plot[l]+0.5, plotfirms$yax_firm_plot[l]+0.5,plotfirms$yax_firm_plot[l])-0.3,
          col = rgb(69/255, 164/255, 194/255, alpha = 1), lwd = 0.1)
  
  # third plot the share of donation+lobbying connections
  polygon(x = c(-2,-2,-2+plotfirms$don_lob_con_share[l],-2+plotfirms$don_lob_con_share[l])-1.25, 
          y = c(plotfirms$yax_firm_plot[l], plotfirms$yax_firm_plot[l]+0.5, plotfirms$yax_firm_plot[l]+0.5,plotfirms$yax_firm_plot[l])-0.3,
          col = rgb(254/255, 175/255, 73/255, alpha = 1), lwd = 0.1)
  
}



# draw POLIICIAN Y axis. Make sure that top politicians in terms of lobbying/donations received are on top
text(x = 0, y = 112-5, labels = c("Firm"), font = 2, cex = 0.75, adj = 1) # draw y axis
text(x = 7, y = 112-5, labels = c("Politician"), font = 2, cex = 0.75, adj = 0) # draw y axis
text(x = 8.6, y = 112.5-5, labels = c("    Share of\nConnections"), font = 2, cex = 0.5, adj = 0) # draw y axis
text(x = -3.3, y = 112.5-5, labels = c("   Share of\nConnections"), font = 2, cex = 0.5, adj = 0) # draw y axis


# now draw lines from 0 to x-axis length of the graph, using the y axis lengths from the plotdyads DF
for (i in 1:nrow(plotdyads[plotdyads$lob_bin==1,])){
  par(new = TRUE, lwd = 0.1) # lwd should scale line widht acc. to total money share, but seems not to work.
  plot(x = c(0.1,6.9), y = c(jitter(plotdyads$yax_firm_plot[plotdyads$lob_bin==1][i], factor = 0.1), jitter(plotdyads$yax_pol[plotdyads$lob_bin==1][i], factor = 0.175)), # plot, add noise to right hand side
       type = "l", axes = FALSE, ylab = "", xlab = "", 
       col = rgb(1/255, 75/255, 122/255, alpha = 0.075), 
       xlim=c(-4, 10), ylim=c(0, length(unique(plotdyads$govtrack_id))))
  #}
  if (i %% 1000 == 0) {
    print(paste0("Blue: processing line ", i))  
  }
}


for (i in 1:nrow(plotdyads[plotdyads$don_bin==1,])){
  par(new = TRUE, lwd = 0.1) # lwd should scale line widht acc. to total money share, but seems not to work.
  plot(x = c(0.1,6.9), y = c(jitter(plotdyads$yax_firm_plot[plotdyads$don_bin==1][i], factor = 0.1), jitter(plotdyads$yax_pol[plotdyads$don_bin==1][i], factor = 0.175)), # plot, add noise to right hand side
       type = "l", axes = FALSE, ylab = "", xlab = "", 
       col = rgb(69/255, 164/255, 194/255, alpha = 0.1), 
       xlim=c(-4, 10), ylim=c(0, length(unique(plotdyads$govtrack_id))))
  #}
  if (i %% 1000 == 0) {
    print(paste0("Navy: processing line ", i))  
  }
}

for (i in 1:nrow(plotdyads[plotdyads$don_lob==1,])){
  par(new = TRUE, lwd = 0.1) # lwd should scale line widht acc. to total money share, but seems not to work.
  plot(x = c(0.1,6.9), y = c(jitter(plotdyads$yax_firm_plot[plotdyads$don_lob==1][i], factor = 0.1), jitter(plotdyads$yax_pol[plotdyads$don_lob==1][i], factor = 0.175)), # plot, add noise to right hand side
       type = "l", axes = FALSE, ylab = "", xlab = "",
       col = rgb(254/255, 175/255, 73/255, alpha = 0.1),
       xlim=c(-4, 10), ylim=c(0, length(unique(plotdyads$govtrack_id))))
  
  if (i %% 1000 == 0) {
    print(paste0("Orange: processing line ", i))
  }
}

dev.off()

